import numpy as np
import pandas as pd


class Agent:

    @staticmethod
    def pred_conf(data, conf_type, sampling, beta=1):
        if conf_type == 'pai':
            return Agent.pred_conf_pai(data, sampling, beta)
        elif conf_type == 'pai2':
            return Agent.pred_conf_pai2(data, sampling, beta)
        else:  # conf_type == 'ev'
            return Agent.pred_conf_ev(data, sampling)

    # reward for correct is 1 and reward for incorrect is 0
    @staticmethod
    def pred_conf_pai(data, sampling, beta):
        r_max = 2
        r_correct = 1
        r_incorrect = 0
        avg_ratings = pd.concat([data['mrUp'], data['mrDown']], axis=1)
        pred_conf = np.exp(
            beta * avg_ratings.max(axis='columns') * (r_correct - r_max) + avg_ratings.min(axis='columns') * (
                        r_incorrect - r_max))
        # but now the expected value and the reward are the same? or what is the reward other than the expected value
        # because the reward is subjective?
        # also could have 1 for correct and 0 for incorrect
        pred_non_conf = np.exp(
            beta * avg_ratings.min(axis='columns') * (r_correct - r_max) + avg_ratings.max(
                axis='columns') * (r_incorrect - r_max))
        pred_conf = pred_conf / (pred_non_conf + pred_conf)

        # OPTIONAL
        if sampling == True:
            samples = np.random.random(size=data.shape[0])
            explore_inds = np.where(samples - pred_conf > 0)[0]
            pred_conf[explore_inds] = 1 - pred_conf[explore_inds]
        return pred_conf

    # reward for correct is expected value of correct choice, reward for incorrect is expected value of incorrect choice
    @staticmethod
    def pred_conf_pai2(data, sampling, beta):
        r_max = 1
        r_correct = 1
        r_incorrect = 0
        avg_ratings = pd.concat([data['mrUp'], data['mrDown']], axis=1)
        pred_conf = np.exp(
            beta * avg_ratings.max(axis='columns') * (avg_ratings.max(axis='columns') - r_max) + avg_ratings.min(axis='columns') * (
                        avg_ratings.min(axis='columns') - r_max))
        # but now the expected value and the reward are the same? or what is the reward other than the expected value
        # because the reward is subjective?
        # also could have 1 for correct and 0 for incorrect
        pred_non_conf = np.exp(
            beta * avg_ratings.min(axis='columns') * (avg_ratings.max(axis='columns') - r_max) + avg_ratings.max(
                axis='columns') * (avg_ratings.min(axis='columns') - r_max))
        pred_conf = pred_conf / (pred_non_conf + pred_conf)

        # OPTIONAL
        if sampling == True:
            samples = np.random.random(size=data.shape[0])
            explore_inds = np.where(samples - pred_conf > 0)[0]
            pred_conf[explore_inds] = 1 - pred_conf[explore_inds]
        return pred_conf

    @staticmethod
    def pred_conf_ev(data, sampling):
        avg_ratings = pd.concat([data['mrUp'], data['mrDown']], axis=1)
        pred_conf = avg_ratings.max(axis='columns')
        pred_non_conf = avg_ratings.min(axis='columns')
        pred_conf = pred_conf / (pred_non_conf + pred_conf)

        # OPTIONAL
        if sampling:
            samples = np.random.random(size=data.shape[0])
            explore_inds = np.where(samples - pred_conf > 0)[0]
            pred_conf[explore_inds] = 1 - pred_conf[explore_inds]
        return pred_conf


    """print(diff_aics)
    types = {"paimuchbetter":[], "paialittlebetter":[], "evbetter":[], "evmuchbetter":[]}
    for i in diff_aics:
        if i > 2:
            types["paimuchbetter"].append(i)
        elif i > 0:
            types["paialittlebetter"].append(i)
        elif i > -2:
            types["evbetter"].append(i)
        else:
            types["evmuchbetter"].append(i)
    for i in types:
        print(len(types[i]))"""